{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 单因子数据分析"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import scipy.stats as ss\n",
    "df = pd.read_csv('./data/HR.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 数据集探索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>satisfaction_level</th>\n",
       "      <th>last_evaluation</th>\n",
       "      <th>number_project</th>\n",
       "      <th>average_monthly_hours</th>\n",
       "      <th>time_spend_company</th>\n",
       "      <th>Work_accident</th>\n",
       "      <th>left</th>\n",
       "      <th>promotion_last_5years</th>\n",
       "      <th>department</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0.38</td>\n",
       "      <td>0.53</td>\n",
       "      <td>2</td>\n",
       "      <td>157</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>sales</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.80</td>\n",
       "      <td>0.86</td>\n",
       "      <td>5</td>\n",
       "      <td>262</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>sales</td>\n",
       "      <td>medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.11</td>\n",
       "      <td>0.88</td>\n",
       "      <td>7</td>\n",
       "      <td>272</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>sales</td>\n",
       "      <td>medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.72</td>\n",
       "      <td>0.87</td>\n",
       "      <td>5</td>\n",
       "      <td>223</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>sales</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.37</td>\n",
       "      <td>0.52</td>\n",
       "      <td>2</td>\n",
       "      <td>159</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>sales</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   satisfaction_level  last_evaluation  number_project  average_monthly_hours  \\\n",
       "0                0.38             0.53               2                    157   \n",
       "1                0.80             0.86               5                    262   \n",
       "2                0.11             0.88               7                    272   \n",
       "3                0.72             0.87               5                    223   \n",
       "4                0.37             0.52               2                    159   \n",
       "\n",
       "   time_spend_company  Work_accident  left  promotion_last_5years department  \\\n",
       "0                   3              0     1                      0      sales   \n",
       "1                   6              0     1                      0      sales   \n",
       "2                   4              0     1                      0      sales   \n",
       "3                   5              0     1                      0      sales   \n",
       "4                   3              0     1                      0      sales   \n",
       "\n",
       "   salary  \n",
       "0     low  \n",
       "1  medium  \n",
       "2  medium  \n",
       "3     low  \n",
       "4     low  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.frame.DataFrame"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.series.Series"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(df['satisfaction_level'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "satisfaction_level         0.612839\n",
       "last_evaluation           67.373732\n",
       "number_project             3.802693\n",
       "average_monthly_hours    201.041728\n",
       "time_spend_company         3.498067\n",
       "Work_accident              0.144581\n",
       "left                       0.238235\n",
       "promotion_last_5years      0.021264\n",
       "dtype: float64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 均值\n",
    "df.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "satisfaction_level         0.64\n",
       "last_evaluation            0.72\n",
       "number_project             4.00\n",
       "average_monthly_hours    200.00\n",
       "time_spend_company         3.00\n",
       "Work_accident              0.00\n",
       "left                       0.00\n",
       "promotion_last_5years      0.00\n",
       "dtype: float64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 中位数\n",
    "df.median()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "satisfaction_level         0.64\n",
       "last_evaluation            0.72\n",
       "number_project             4.00\n",
       "average_monthly_hours    200.00\n",
       "time_spend_company         3.00\n",
       "Work_accident              0.00\n",
       "left                       0.00\n",
       "promotion_last_5years      0.00\n",
       "Name: 0.5, dtype: float64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 中分位数-即中位数\n",
    "df.quantile()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "satisfaction_level         0.44\n",
       "last_evaluation            0.56\n",
       "number_project             3.00\n",
       "average_monthly_hours    156.00\n",
       "time_spend_company         3.00\n",
       "Work_accident              0.00\n",
       "left                       0.00\n",
       "promotion_last_5years      0.00\n",
       "Name: 0.25, dtype: float64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 下四分位数\n",
    "df.quantile(q=0.25)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>satisfaction_level</th>\n",
       "      <th>last_evaluation</th>\n",
       "      <th>number_project</th>\n",
       "      <th>average_monthly_hours</th>\n",
       "      <th>time_spend_company</th>\n",
       "      <th>Work_accident</th>\n",
       "      <th>left</th>\n",
       "      <th>promotion_last_5years</th>\n",
       "      <th>department</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.55</td>\n",
       "      <td>4.0</td>\n",
       "      <td>135</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>sales</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>156</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   satisfaction_level  last_evaluation  number_project  average_monthly_hours  \\\n",
       "0                 0.1             0.55             4.0                    135   \n",
       "1                 NaN              NaN             NaN                    156   \n",
       "\n",
       "   time_spend_company  Work_accident  left  promotion_last_5years department  \\\n",
       "0                 3.0            0.0   0.0                    0.0      sales   \n",
       "1                 NaN            NaN   NaN                    NaN        NaN   \n",
       "\n",
       "  salary  \n",
       "0    low  \n",
       "1    NaN  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 众数\n",
    "df.mode()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "satisfaction_level          0.248623\n",
       "last_evaluation          8164.407524\n",
       "number_project              1.232733\n",
       "average_monthly_hours      49.941815\n",
       "time_spend_company          1.460053\n",
       "Work_accident               0.351689\n",
       "left                        0.426018\n",
       "promotion_last_5years       0.144267\n",
       "dtype: float64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 标准差\n",
    "df.std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "satisfaction_level       6.181359e-02\n",
       "last_evaluation          6.665755e+07\n",
       "number_project           1.519630e+00\n",
       "average_monthly_hours    2.494185e+03\n",
       "time_spend_company       2.131754e+00\n",
       "Work_accident            1.236854e-01\n",
       "left                     1.814911e-01\n",
       "promotion_last_5years    2.081307e-02\n",
       "dtype: float64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 方差\n",
    "df.var()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "satisfaction_level                                                 9192.59\n",
       "last_evaluation                                                1.01074e+06\n",
       "number_project                                                       57048\n",
       "average_monthly_hours                                              3016028\n",
       "time_spend_company                                                   52478\n",
       "Work_accident                                                         2169\n",
       "left                                                                  3574\n",
       "promotion_last_5years                                                  319\n",
       "department               salessalessalessalessalessalessalessalessaless...\n",
       "salary                   lowmediummediumlowlowlowlowlowlowlowlowlowlowl...\n",
       "dtype: object"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 求和\n",
    "df.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "satisfaction_level        -0.476438\n",
       "last_evaluation          122.482652\n",
       "number_project             0.337774\n",
       "average_monthly_hours      0.053225\n",
       "time_spend_company         1.853530\n",
       "Work_accident              2.021481\n",
       "left                       1.229057\n",
       "promotion_last_5years      6.637677\n",
       "dtype: float64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 偏态系数\n",
    "df.skew()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "satisfaction_level          -0.670696\n",
       "last_evaluation          15001.999987\n",
       "number_project              -0.495810\n",
       "average_monthly_hours       -1.135016\n",
       "time_spend_company           4.774353\n",
       "Work_accident                2.086664\n",
       "left                        -0.489485\n",
       "promotion_last_5years       42.064357\n",
       "dtype: float64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 峰态系数\n",
    "df.kurt()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 正态分布"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 生成正态分布\n",
    "zt = ss.norm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<scipy.stats._continuous_distns.norm_gen at 0x16967cbce48>"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "zt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array(0.), array(1.), array(0.), array(0.))"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 正态分布的指标：m-均值，v-方差，s-偏态系数，k-峰态系数\n",
    "zt.stats(moments='mvsk')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.3989422804014327"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 指定横坐标，返回纵坐标值\n",
    "zt.pdf(0.0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.2815515655446004"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 指定0-1之间的数，表示负无穷到这个值的累积值，就是上侧分位数\n",
    "zt.ppf(0.9)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9772498680518208"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 从负无穷到2的累积概率\n",
    "zt.cdf(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9544997361036416"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 正两倍的标准差-负两倍的标准差=所得的中间的累计概率\n",
    "zt.cdf(2)-zt.cdf(-2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.58132524, -0.85269351, -0.4265185 ,  0.74607319, -1.75447628,\n",
       "       -0.6633306 ,  1.85555626,  0.38276054,  0.05529418,  0.40815465])"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 得到size大小符合正态分布的数字\n",
    "zt.rvs(size=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 卡方分布 | t分布 | f分布"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "chi = ss.chi2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<scipy.stats._continuous_distns.chi2_gen at 0x16967ca3a58>"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "t = ss.t"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<scipy.stats._continuous_distns.t_gen at 0x169678cd8d0>"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "f = ss.f"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<scipy.stats._continuous_distns.f_gen at 0x16967c58390>"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "f"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 抽样"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>satisfaction_level</th>\n",
       "      <th>last_evaluation</th>\n",
       "      <th>number_project</th>\n",
       "      <th>average_monthly_hours</th>\n",
       "      <th>time_spend_company</th>\n",
       "      <th>Work_accident</th>\n",
       "      <th>left</th>\n",
       "      <th>promotion_last_5years</th>\n",
       "      <th>department</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>7400</td>\n",
       "      <td>0.73</td>\n",
       "      <td>0.36</td>\n",
       "      <td>4</td>\n",
       "      <td>253</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>RandD</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6890</td>\n",
       "      <td>0.52</td>\n",
       "      <td>0.82</td>\n",
       "      <td>2</td>\n",
       "      <td>242</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>sales</td>\n",
       "      <td>medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6663</td>\n",
       "      <td>0.81</td>\n",
       "      <td>0.96</td>\n",
       "      <td>3</td>\n",
       "      <td>226</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>sales</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4992</td>\n",
       "      <td>0.55</td>\n",
       "      <td>0.91</td>\n",
       "      <td>4</td>\n",
       "      <td>187</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>hr</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10296</td>\n",
       "      <td>0.74</td>\n",
       "      <td>0.74</td>\n",
       "      <td>2</td>\n",
       "      <td>254</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>product_mng</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2558</td>\n",
       "      <td>0.53</td>\n",
       "      <td>0.73</td>\n",
       "      <td>4</td>\n",
       "      <td>248</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>technical</td>\n",
       "      <td>medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11272</td>\n",
       "      <td>0.66</td>\n",
       "      <td>0.57</td>\n",
       "      <td>4</td>\n",
       "      <td>161</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>management</td>\n",
       "      <td>medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>488</td>\n",
       "      <td>0.10</td>\n",
       "      <td>0.83</td>\n",
       "      <td>7</td>\n",
       "      <td>276</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>accounting</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12098</td>\n",
       "      <td>0.11</td>\n",
       "      <td>0.97</td>\n",
       "      <td>6</td>\n",
       "      <td>284</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>sales</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5958</td>\n",
       "      <td>0.97</td>\n",
       "      <td>0.61</td>\n",
       "      <td>3</td>\n",
       "      <td>208</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>technical</td>\n",
       "      <td>medium</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       satisfaction_level  last_evaluation  number_project  \\\n",
       "7400                 0.73             0.36               4   \n",
       "6890                 0.52             0.82               2   \n",
       "6663                 0.81             0.96               3   \n",
       "4992                 0.55             0.91               4   \n",
       "10296                0.74             0.74               2   \n",
       "2558                 0.53             0.73               4   \n",
       "11272                0.66             0.57               4   \n",
       "488                  0.10             0.83               7   \n",
       "12098                0.11             0.97               6   \n",
       "5958                 0.97             0.61               3   \n",
       "\n",
       "       average_monthly_hours  time_spend_company  Work_accident  left  \\\n",
       "7400                     253                   2              1     0   \n",
       "6890                     242                   3              0     0   \n",
       "6663                     226                   3              0     0   \n",
       "4992                     187                   4              1     0   \n",
       "10296                    254                   3              0     0   \n",
       "2558                     248                   2              0     0   \n",
       "11272                    161                   7              0     0   \n",
       "488                      276                   4              0     1   \n",
       "12098                    284                   4              0     1   \n",
       "5958                     208                   3              0     0   \n",
       "\n",
       "       promotion_last_5years   department  salary  \n",
       "7400                       0        RandD     low  \n",
       "6890                       0        sales  medium  \n",
       "6663                       0        sales     low  \n",
       "4992                       0           hr     low  \n",
       "10296                      0  product_mng     low  \n",
       "2558                       0    technical  medium  \n",
       "11272                      1   management  medium  \n",
       "488                        0   accounting     low  \n",
       "12098                      0        sales     low  \n",
       "5958                       0    technical  medium  "
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 抽10个样本\n",
    "df.sample(n=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>satisfaction_level</th>\n",
       "      <th>last_evaluation</th>\n",
       "      <th>number_project</th>\n",
       "      <th>average_monthly_hours</th>\n",
       "      <th>time_spend_company</th>\n",
       "      <th>Work_accident</th>\n",
       "      <th>left</th>\n",
       "      <th>promotion_last_5years</th>\n",
       "      <th>department</th>\n",
       "      <th>salary</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>6670</td>\n",
       "      <td>0.89</td>\n",
       "      <td>0.46</td>\n",
       "      <td>4</td>\n",
       "      <td>248</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>hr</td>\n",
       "      <td>medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7121</td>\n",
       "      <td>0.93</td>\n",
       "      <td>1.00</td>\n",
       "      <td>3</td>\n",
       "      <td>148</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>sales</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9934</td>\n",
       "      <td>0.58</td>\n",
       "      <td>0.50</td>\n",
       "      <td>5</td>\n",
       "      <td>184</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>sales</td>\n",
       "      <td>medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8249</td>\n",
       "      <td>0.82</td>\n",
       "      <td>0.71</td>\n",
       "      <td>5</td>\n",
       "      <td>208</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>management</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>11370</td>\n",
       "      <td>0.21</td>\n",
       "      <td>0.43</td>\n",
       "      <td>5</td>\n",
       "      <td>175</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>sales</td>\n",
       "      <td>high</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2275</td>\n",
       "      <td>0.94</td>\n",
       "      <td>0.74</td>\n",
       "      <td>5</td>\n",
       "      <td>171</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>IT</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8288</td>\n",
       "      <td>0.86</td>\n",
       "      <td>0.51</td>\n",
       "      <td>3</td>\n",
       "      <td>185</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>technical</td>\n",
       "      <td>medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>12383</td>\n",
       "      <td>0.38</td>\n",
       "      <td>0.51</td>\n",
       "      <td>2</td>\n",
       "      <td>159</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>sales</td>\n",
       "      <td>medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>10622</td>\n",
       "      <td>0.60</td>\n",
       "      <td>0.49</td>\n",
       "      <td>3</td>\n",
       "      <td>239</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>sales</td>\n",
       "      <td>medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7712</td>\n",
       "      <td>0.60</td>\n",
       "      <td>0.86</td>\n",
       "      <td>6</td>\n",
       "      <td>272</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>technical</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7209</td>\n",
       "      <td>0.87</td>\n",
       "      <td>0.98</td>\n",
       "      <td>3</td>\n",
       "      <td>174</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>hr</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3775</td>\n",
       "      <td>0.70</td>\n",
       "      <td>0.71</td>\n",
       "      <td>5</td>\n",
       "      <td>269</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>technical</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5705</td>\n",
       "      <td>0.90</td>\n",
       "      <td>0.58</td>\n",
       "      <td>5</td>\n",
       "      <td>260</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>support</td>\n",
       "      <td>medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8495</td>\n",
       "      <td>0.95</td>\n",
       "      <td>0.62</td>\n",
       "      <td>4</td>\n",
       "      <td>255</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>sales</td>\n",
       "      <td>medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14222</td>\n",
       "      <td>0.11</td>\n",
       "      <td>0.81</td>\n",
       "      <td>6</td>\n",
       "      <td>305</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>sales</td>\n",
       "      <td>low</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       satisfaction_level  last_evaluation  number_project  \\\n",
       "6670                 0.89             0.46               4   \n",
       "7121                 0.93             1.00               3   \n",
       "9934                 0.58             0.50               5   \n",
       "8249                 0.82             0.71               5   \n",
       "11370                0.21             0.43               5   \n",
       "2275                 0.94             0.74               5   \n",
       "8288                 0.86             0.51               3   \n",
       "12383                0.38             0.51               2   \n",
       "10622                0.60             0.49               3   \n",
       "7712                 0.60             0.86               6   \n",
       "7209                 0.87             0.98               3   \n",
       "3775                 0.70             0.71               5   \n",
       "5705                 0.90             0.58               5   \n",
       "8495                 0.95             0.62               4   \n",
       "14222                0.11             0.81               6   \n",
       "\n",
       "       average_monthly_hours  time_spend_company  Work_accident  left  \\\n",
       "6670                     248                   5              1     0   \n",
       "7121                     148                   4              0     0   \n",
       "9934                     184                   4              0     0   \n",
       "8249                     208                   2              0     0   \n",
       "11370                    175                   2              1     0   \n",
       "2275                     171                   3              0     0   \n",
       "8288                     185                   2              0     0   \n",
       "12383                    159                   3              0     1   \n",
       "10622                    239                   2              0     0   \n",
       "7712                     272                   4              0     0   \n",
       "7209                     174                   3              0     0   \n",
       "3775                     269                   3              0     0   \n",
       "5705                     260                   2              0     0   \n",
       "8495                     255                   2              0     0   \n",
       "14222                    305                   4              0     1   \n",
       "\n",
       "       promotion_last_5years  department  salary  \n",
       "6670                       0          hr  medium  \n",
       "7121                       0       sales     low  \n",
       "9934                       0       sales  medium  \n",
       "8249                       0  management     low  \n",
       "11370                      0       sales    high  \n",
       "2275                       0          IT     low  \n",
       "8288                       0   technical  medium  \n",
       "12383                      0       sales  medium  \n",
       "10622                      0       sales  medium  \n",
       "7712                       0   technical     low  \n",
       "7209                       0          hr     low  \n",
       "3775                       0   technical     low  \n",
       "5705                       0     support  medium  \n",
       "8495                       0       sales  medium  \n",
       "14222                      0       sales     low  "
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 抽百分比为0.001的样本\n",
    "df.sample(frac=0.001)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
